# -*- coding: utf-8 -*-
"""
@Time    : 2024/7/23 11:46 
@Author  : ZhangShenao 
@File    : 3.使用Office文档加载器.py 
@Desc    : 使用Office文档加载器,包括UnstructuredExcelLoader、UnstructuredPowerPointLoader、UnstructuredWordDocumentLoader
"""
from langchain_community.document_loaders import UnstructuredWordDocumentLoader

# 加载Excel文档
# excel_loader = UnstructuredExcelLoader(file_path="./docs/员工考勤表.xlsx")
# excel_doc = excel_loader.load()[0]
# print(f'excel page_content: {excel_doc.page_content}')
# print(f'excel metadata: {excel_doc.metadata}')

# 加载PowerPoint文档
# powerpoint_loader = UnstructuredPowerPointLoader(file_path="./docs/章节介绍.pptx")
# powerpoint_doc = powerpoint_loader.load()[0]
# print(f'powerpoint page_content: {powerpoint_doc.page_content}')
# print(f'powerpoint metadata: {powerpoint_doc.metadata}')

# 加载Word文档
word_loader = UnstructuredWordDocumentLoader(file_path="./docs/喵喵.docx")
word_doc = word_loader.load()[0]
print(f'word page_content: {word_doc.page_content}')
print(f'word metadata: {word_doc.metadata}')
